geom_line(size=0.75)+
geom_point(size=3)+
facet_wrap(~ variable,
#scales="free",
labeller=as_labeller(c(ats_admin="Average Transition Score",
full="Fraction Seats Occupied",
va_combined="Value Added",
mts_admin="Minimum Transition Score")))+
geom_vline(xintercept=-0.5)+
theme(legend.key.size = unit(0.8, 'cm'),
legend.position = "right",
legend.title.align=0.5,
legend.text = element_text(size=12),
legend.title=element_text(size=12),
plot.title = element_text(size = 15),
legend.justification = c("right"),
plot.subtitle = element_text(size=20, hjust = 0.5, vjust=-4),
strip.text.x = element_text(size = 12),
axis.text.x=element_text(color = "black", size=12),
axis.text.y=element_text(color = "black", size=12),
panel.grid.minor = element_line(linewidth = 0.5),
panel.grid.major = element_line(linewidth = 0.75),legend.spacing.y = unit(10, 'pt'))+
guides(color=guide_legend(title="Track Type",byrow=T))+
#scale_color_manual(labels = c("Closed", "Non-Closed")) +
scale_x_continuous(breaks=-4:3)+
#scale_y_continuous(limits=c(-0.75,0.75))+
xlab("Years Relative to Closure") +
ylab("Standard Deviations")
g
#pooled
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
pdf("event_study_shrinking_pooled_with_counterfactual_v2_difference.pdf",width=7.5,height=4.5)
g
dev.off()
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
pdf("event_study_shrinking_pooled_with_counterfactual_v2_difference_ats.pdf",width=7.5,height=4.5)
ggplot(data_graph_event %>% filter(variable=="ats_admin"),aes(x=year_event,y=value,group=group,color=group)) +
geom_line(size=0.75)+
geom_point(size=3)+
# facet_wrap(~ variable,
#            #scales="free",
#          labeller=as_labeller(c(ats_admin="Average Transition Score",
#                               full="Fraction Seats Occupied",
#                               va_combined="Value Added")))+
geom_vline(xintercept=-0.5)+
theme(legend.key.size = unit(0.8, 'cm'),
legend.position = "right",
legend.title.align=0.5,
legend.text = element_text(size=12),
legend.title=element_text(size=12),
plot.title = element_text(size = 15),
legend.justification = c("right"),
plot.subtitle = element_text(size=20, hjust = 0.5, vjust=-4),
strip.text.x = element_text(size = 12),
axis.text.x=element_text(color = "black", size=12),
axis.text.y=element_text(color = "black", size=12),
panel.grid.minor = element_line(linewidth = 0.5),
panel.grid.major = element_line(linewidth = 0.75),legend.spacing.y = unit(10, 'pt'))+
guides(color=guide_legend(title="Track Type",byrow=T))+
#scale_color_manual(labels = c("Closed", "Non-Closed")) +
scale_x_continuous(breaks=-4:3)+
#scale_y_continuous(limits=c(-0.75,0.75))+
xlab("Years Relative to Closure") +
ylab("Standard Deviations")
dev.off()
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
pdf("event_study_shrinking_pooled_with_counterfactual_v2_difference_va_combined.pdf",width=7.5,height=4.5)
ggplot(data_graph_event %>% filter(variable=="va_combined"),aes(x=year_event,y=value,group=group,color=group)) +
geom_line(size=0.75)+
geom_point(size=3)+
# facet_wrap(~ variable,
#            #scales="free",
#          labeller=as_labeller(c(ats_admin="Average Transition Score",
#                               full="Fraction Seats Occupied",
#                               va_combined="Value Added")))+
geom_vline(xintercept=-0.5)+
theme(legend.key.size = unit(0.8, 'cm'),
legend.position = "right",
legend.title.align=0.5,
legend.text = element_text(size=12),
legend.title=element_text(size=12),
plot.title = element_text(size = 15),
legend.justification = c("right"),
plot.subtitle = element_text(size=20, hjust = 0.5, vjust=-4),
strip.text.x = element_text(size = 12),
axis.text.x=element_text(color = "black", size=12),
axis.text.y=element_text(color = "black", size=12),
panel.grid.minor = element_line(linewidth = 0.5),
panel.grid.major = element_line(linewidth = 0.75),legend.spacing.y = unit(10, 'pt'))+
guides(color=guide_legend(title="Track Type",byrow=T))+
#scale_color_manual(labels = c("Closed", "Non-Closed")) +
scale_x_continuous(breaks=-4:3)+
#scale_y_continuous(limits=c(-0.75,0.75))+
xlab("Years Relative to Closure") +
ylab("Standard Deviations")
dev.off()
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
pdf("event_study_shrinking_pooled_with_counterfactual_v2_difference_full.pdf",width=7.5,height=4.5)
ggplot(data_graph_event %>% filter(variable=="full"),aes(x=year_event,y=value,group=group,color=group)) +
geom_line(size=0.75)+
geom_point(size=3)+
# facet_wrap(~ variable,
#            #scales="free",
#          labeller=as_labeller(c(ats_admin="Average Transition Score",
#                               full="Fraction Seats Occupied",
#                               va_combined="Value Added")))+
geom_vline(xintercept=-0.5)+
theme(legend.key.size = unit(0.8, 'cm'),
legend.position = "right",
legend.title.align=0.5,
legend.text = element_text(size=12),
legend.title=element_text(size=12),
plot.title = element_text(size = 15),
legend.justification = c("right"),
plot.subtitle = element_text(size=20, hjust = 0.5, vjust=-4),
strip.text.x = element_text(size = 12),
axis.text.x=element_text(color = "black", size=12),
axis.text.y=element_text(color = "black", size=12),
panel.grid.minor = element_line(linewidth = 0.5),
panel.grid.major = element_line(linewidth = 0.75),legend.spacing.y = unit(10, 'pt'))+
guides(color=guide_legend(title="Track Type",byrow=T))+
#scale_color_manual(labels = c("Closed", "Non-Closed")) +
scale_x_continuous(breaks=-4:3)+
#scale_y_continuous(limits=c(-0.75,0.75))+
xlab("Years Relative to Closure") +
ylab("Standard Deviations")
dev.off()
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
pdf("event_study_shrinking_pooled_with_counterfactual_v2_difference_mts.pdf",width=7.5,height=4.5)
ggplot(data_graph_event %>% filter(variable=="mts_admin"),aes(x=year_event,y=value,group=group,color=group)) +
geom_line(size=0.75)+
geom_point(size=3)+
# facet_wrap(~ variable,
#            #scales="free",
#          labeller=as_labeller(c(ats_admin="Average Transition Score",
#                               full="Fraction Seats Occupied",
#                               va_combined="Value Added")))+
geom_vline(xintercept=-0.5)+
theme(legend.key.size = unit(0.8, 'cm'),
legend.position = "right",
legend.title.align=0.5,
legend.text = element_text(size=12),
legend.title=element_text(size=12),
plot.title = element_text(size = 15),
legend.justification = c("right"),
plot.subtitle = element_text(size=20, hjust = 0.5, vjust=-4),
strip.text.x = element_text(size = 12),
axis.text.x=element_text(color = "black", size=12),
axis.text.y=element_text(color = "black", size=12),
panel.grid.minor = element_line(linewidth = 0.5),
panel.grid.major = element_line(linewidth = 0.75),legend.spacing.y = unit(10, 'pt'))+
guides(color=guide_legend(title="Track Type",byrow=T))+
#scale_color_manual(labels = c("Closed", "Non-Closed")) +
scale_x_continuous(breaks=-4:3)+
#scale_y_continuous(limits=c(-0.75,0.75))+
xlab("Years Relative to Closure") +
ylab("Standard Deviations")
dev.off()
data_graph_event_close<-data_track_yr %>%
group_by(us2B) %>%
mutate(close=!any(expand==T) & any(shrink==T),
close_yr=min(year_admit[shrink==T])) %>%
#filter(!grepl("OT",us2B)) %>%
filter(close==T) %>%
ungroup() %>%
#mutate(full=n_students/n_seats) %>%
full_join(data_track_yr %>% ungroup %>% expand(year_admit,nesting(us2B,track_type,town,county_short)),
suffix=c("_old","")) %>%
ungroup() %>%
select(track_type,us2B,close_yr,county_short,town,year_admit,va_combined,ats_admin,mts_admin,full) %>%
filter(close_yr>=2011)
data_graph_event_control<-data_track_yr %>%
group_by(us2B) %>%
mutate(close=!any(shrink==T)) %>%
#filter(!grepl("OT",us2B)) %>%
group_by(town) %>%
filter(any(close==T)) %>%
group_by(us2B) %>%
filter(!any(close==T)) %>%
ungroup() %>%
#mutate(full=n_students/n_seats) %>%
select(county_short,town,year_admit,va_combined,ats_admin,mts_admin,full)
mean_3<-function(x) {(x+lag(x,1)+lag(x,2))/3}
data_graph_event_best<-data_track_yr %>%
select(county_short,town,year_admit,va_combined,ats_admin,mts_admin,full,us2B) %>%
group_by(town,county_short,us2B) %>%
arrange(us2B,year_admit) %>%
mutate(mts_mean=mean_3(mts_admin),
ats_mean=mean_3(ats_admin),
va_combined_mean=mean_3(va_combined),
full_mean=mean_3(full)) %>%
group_by(town,year_admit,county_short) %>%
summarize(
min_mts=us2B[which.min(mts_mean)[1]],
min_ats=us2B[which.min(ats_mean)[1]],
min_va=us2B[which.min(va_combined_mean)[1]],
min_full=us2B[which.min(full_mean)[1]]
)
data_graph_event_best_type<-data_track_yr %>%
select(track_type,county_short,town,year_admit,va_combined,ats_admin,mts_admin,full,us2B) %>%
group_by(town,county_short,us2B) %>%
arrange(us2B,year_admit) %>%
mutate(mts_mean=mean_3(mts_admin),
ats_mean=mean_3(ats_admin),
va_combined_mean=mean_3(va_combined),
full_mean=mean_3(full)) %>%
group_by(track_type,town,year_admit,county_short) %>%
summarize(
min_mts=us2B[which.min(mts_mean)[1]],
min_ats=us2B[which.min(ats_mean)[1]],
min_va=us2B[which.min(va_combined_mean)[1]],
min_full=us2B[which.min(full_mean)[1]]
)
data_graph_event<-data_graph_event_close %>%
#merge non-closing tracks in same town
left_join(data_graph_event_control,by=c("year_admit","town","county_short"),suffix=c("","__non_close")) %>%
#merge data with best-case scenario of closure
left_join(data_graph_event_best,by=c("close_yr"="year_admit","town","county_short")) %>%
#merge data with best-case scenario of closure of same track type
left_join(data_graph_event_best_type,
by=c("close_yr"="year_admit","town","county_short","track_type"),
suffix=c("","__same_type")) %>%
#get the actual metrics of the "best" track to close OF THE SAME TYPE
left_join(data_track_yr %>% select(year_admit,town,county_short,ats_admin,us2B),
by=c("year_admit","town","county_short","min_ats__same_type"="us2B"),suffix=c("","__same_type")) %>%
left_join(data_track_yr %>% select(year_admit,town,county_short,mts_admin,us2B),
by=c("year_admit","town","county_short","min_mts__same_type"="us2B"),suffix=c("","__same_type")) %>%
left_join(data_track_yr %>% select(year_admit,town,county_short,va_combined,us2B),
by=c("year_admit","town","county_short","min_va__same_type"="us2B"),suffix=c("","__same_type")) %>%
# left_join(data_track_yr %>% select(year_admit,town,county_short,bs_pass_lag,us2B),
#           by=c("year_admit","town","county_short","min_bs_pass__same_type"="us2B"),suffix=c("","__same_type")) %>%
# left_join(data_track_yr %>% select(year_admit,town,county_short,bs_pass_c_score_lag,us2B),
#           by=c("year_admit","town","county_short","min_bs_pass_c_score__same_type"="us2B"),suffix=c("","__same_type")) %>%
left_join(data_track_yr %>% select(year_admit,town,county_short,full,us2B),
by=c("year_admit","town","county_short","min_full__same_type"="us2B"),suffix=c("","__same_type")) %>%
#get the actual metrics of the "best" track to close
left_join(data_track_yr %>% select(year_admit,town,county_short,ats_admin,us2B),
by=c("year_admit","town","county_short","min_ats"="us2B"),suffix=c("__close","__best")) %>%
left_join(data_track_yr %>% select(year_admit,town,county_short,mts_admin,us2B),
by=c("year_admit","town","county_short","min_mts"="us2B"),suffix=c("__close","__best")) %>%
left_join(data_track_yr %>% select(year_admit,town,county_short,va_combined,us2B),
by=c("year_admit","town","county_short","min_va"="us2B"),suffix=c("__close","__best")) %>%
# left_join(data_track_yr %>% select(year_admit,town,county_short,bs_pass_lag,us2B),
#           by=c("year_admit","town","county_short","min_bs_pass"="us2B"),suffix=c("__close","__best")) %>%
# left_join(data_track_yr %>% select(year_admit,town,county_short,bs_pass_c_score_lag,us2B),
#           by=c("year_admit","town","county_short","min_bs_pass_c_score"="us2B"),suffix=c("__close","__best")) %>%
left_join(data_track_yr %>% select(year_admit,town,county_short,full,us2B),
by=c("year_admit","town","county_short","min_full"="us2B"),suffix=c("__close","__best")) %>%
select(-min_ats,-min_va,-min_full,-min_mts,#-min_bs_pass_c_score,-min_bs_pass,
-min_ats__same_type,-min_va__same_type,-min_full__same_type,-min_mts__same_type,#-min_bs_pass_c_score__same_type,-min_bs_pass__same_type
) %>%
pivot_longer(cols = va_combined__close:full__best,
names_to = c("variable","group"),
values_to="value",
names_sep="__") %>%
mutate(year_event=year_admit-close_yr-1) %>%
#filter(close_yr %in% 2010:2013) %>%
group_by(year_event,group,variable) %>%
summarize(value=mean(value,na.rm=T)) %>%
filter(year_event %in% -4:3) %>%
mutate(group=case_when(group=="close" ~ "Contracting",
group=="non_close" ~ "Non-Contracting",
group=="best" ~ str_wrap("Best Feasible Contraction",20),
group=="same_type" ~ str_wrap("Best Feasible Contraction (Same Specialization)",20))) %>%
mutate(group=factor(group,levels=c("Non-Contracting","Contracting",str_wrap("Best Feasible Contraction",20),str_wrap("Best Feasible Contraction (Same Specialization)",20)))) %>%
mutate(variable=factor(variable,levels=c("ats_admin","mts_admin","full","va_combined")))
g<-ggplot(data_graph_event,aes(x=year_event,y=value,group=group,color=group)) +
geom_line(size=0.75)+
geom_point(size=3)+
facet_wrap(~ variable,
#scales="free",
labeller=as_labeller(c(ats_admin="Average Transition Score",
mts_admin="Minimum Transition Score",
full="Fraction Seats Occupied",
va_combined="Value Added")))+
geom_vline(xintercept=-0.5)+
theme(legend.key.size = unit(0.8, 'cm'),
legend.position = "right",
legend.title.align=0.5,
legend.text = element_text(size=12),
legend.title=element_text(size=12),
plot.title = element_text(size = 15),
legend.justification = c("right"),
plot.subtitle = element_text(size=20, hjust = 0.5, vjust=-4),
strip.text.x = element_text(size = 12),
axis.text.x=element_text(color = "black", size=12),
axis.text.y=element_text(color = "black", size=12),
panel.grid.minor = element_line(linewidth = 0.5),
panel.grid.major = element_line(linewidth = 0.75),legend.spacing.y = unit(10, 'pt'))+
guides(color=guide_legend(title="Track Type",byrow=T))+
#scale_color_manual(labels = c("Closed", "Non-Closed")) +
scale_x_continuous(breaks=-4:3)+
#scale_y_continuous(limits=c(-0.75,0.75))+
xlab("Years Relative to Closure") +
ylab("Standard Deviations")
g
#pooled
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
pdf("event_study_shrinking_pooled_with_counterfactual_v3_level_3_yrs.pdf",width=7.5,height=4.5)
g
dev.off()
tiff("event_study_shrinking_pooled_with_counterfactual_v3_level_3_yrs.tiff", units="in", width=10, height=6, res=300)
g
dev.off()
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
pdf("event_study_shrinking_pooled_with_counterfactual_v3_level_3_yrs_ats.pdf",width=7.5,height=4.5)
ggplot(data_graph_event %>% filter(variable=="ats_admin"),aes(x=year_event,y=value,group=group,color=group)) +
geom_line(size=0.75)+
geom_point(size=3)+
# facet_wrap(~ variable,
#            #scales="free",
#          labeller=as_labeller(c(ats_admin="Average Transition Score",
#                               full="Fraction Seats Occupied",
#                               va_combined="Value Added")))+
geom_vline(xintercept=-0.5)+
theme(legend.key.size = unit(0.8, 'cm'),
legend.position = "right",
legend.title.align=0.5,
legend.text = element_text(size=12),
legend.title=element_text(size=12),
plot.title = element_text(size = 15),
legend.justification = c("right"),
plot.subtitle = element_text(size=20, hjust = 0.5, vjust=-4),
strip.text.x = element_text(size = 12),
axis.text.x=element_text(color = "black", size=12),
axis.text.y=element_text(color = "black", size=12),
panel.grid.minor = element_line(linewidth = 0.5),
panel.grid.major = element_line(linewidth = 0.75),legend.spacing.y = unit(10, 'pt'))+
guides(color=guide_legend(title="Track Type",byrow=T))+
#scale_color_manual(labels = c("Closed", "Non-Closed")) +
scale_x_continuous(breaks=-4:3)+
#scale_y_continuous(limits=c(-0.75,0.75))+
xlab("Years Relative to Closure") +
ylab("Standard Deviations")
dev.off()
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
pdf("event_study_shrinking_pooled_with_counterfactual_v3_level_3_yrs_mts.pdf",width=7.5,height=4.5)
ggplot(data_graph_event %>% filter(variable=="mts_admin"),aes(x=year_event,y=value,group=group,color=group)) +
geom_line(size=0.75)+
geom_point(size=3)+
# facet_wrap(~ variable,
#            #scales="free",
#          labeller=as_labeller(c(ats_admin="Average Transition Score",
#                               full="Fraction Seats Occupied",
#                               va_combined="Value Added")))+
geom_vline(xintercept=-0.5)+
theme(legend.key.size = unit(0.8, 'cm'),
legend.position = "right",
legend.title.align=0.5,
legend.text = element_text(size=12),
legend.title=element_text(size=12),
plot.title = element_text(size = 15),
legend.justification = c("right"),
plot.subtitle = element_text(size=20, hjust = 0.5, vjust=-4),
strip.text.x = element_text(size = 12),
axis.text.x=element_text(color = "black", size=12),
axis.text.y=element_text(color = "black", size=12),
panel.grid.minor = element_line(linewidth = 0.5),
panel.grid.major = element_line(linewidth = 0.75),legend.spacing.y = unit(10, 'pt'))+
guides(color=guide_legend(title="Track Type",byrow=T))+
#scale_color_manual(labels = c("Closed", "Non-Closed")) +
scale_x_continuous(breaks=-4:3)+
#scale_y_continuous(limits=c(-0.75,0.75))+
xlab("Years Relative to Closure") +
ylab("Standard Deviations")
dev.off()
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
pdf("event_study_shrinking_pooled_with_counterfactual_v3_level_3_yrs_va_combined.pdf",width=7.5,height=4.5)
ggplot(data_graph_event %>% filter(variable=="va_combined"),aes(x=year_event,y=value,group=group,color=group)) +
geom_line(size=0.75)+
geom_point(size=3)+
# facet_wrap(~ variable,
#            #scales="free",
#          labeller=as_labeller(c(ats_admin="Average Transition Score",
#                               full="Fraction Seats Occupied",
#                               va_combined="Value Added")))+
geom_vline(xintercept=-0.5)+
theme(legend.key.size = unit(0.8, 'cm'),
legend.position = "right",
legend.title.align=0.5,
legend.text = element_text(size=12),
legend.title=element_text(size=12),
plot.title = element_text(size = 15),
legend.justification = c("right"),
plot.subtitle = element_text(size=20, hjust = 0.5, vjust=-4),
strip.text.x = element_text(size = 12),
axis.text.x=element_text(color = "black", size=12),
axis.text.y=element_text(color = "black", size=12),
panel.grid.minor = element_line(linewidth = 0.5),
panel.grid.major = element_line(linewidth = 0.75),legend.spacing.y = unit(10, 'pt'))+
guides(color=guide_legend(title="Track Type",byrow=T))+
#scale_color_manual(labels = c("Closed", "Non-Closed")) +
scale_x_continuous(breaks=-4:3)+
#scale_y_continuous(limits=c(-0.75,0.75))+
xlab("Years Relative to Closure") +
ylab("Standard Deviations")
dev.off()
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
pdf("event_study_shrinking_pooled_with_counterfactual_v3_level_3_yrs_full.pdf",width=7.5,height=4.5)
ggplot(data_graph_event %>% filter(variable=="full"),aes(x=year_event,y=value,group=group,color=group)) +
geom_line(size=0.75)+
geom_point(size=3)+
# facet_wrap(~ variable,
#            #scales="free",
#          labeller=as_labeller(c(ats_admin="Average Transition Score",
#                               full="Fraction Seats Occupied",
#                               va_combined="Value Added")))+
geom_vline(xintercept=-0.5)+
theme(legend.key.size = unit(0.8, 'cm'),
legend.position = "right",
legend.title.align=0.5,
legend.text = element_text(size=12),
legend.title=element_text(size=12),
plot.title = element_text(size = 15),
legend.justification = c("right"),
plot.subtitle = element_text(size=20, hjust = 0.5, vjust=-4),
strip.text.x = element_text(size = 12),
axis.text.x=element_text(color = "black", size=12),
axis.text.y=element_text(color = "black", size=12),
panel.grid.minor = element_line(linewidth = 0.5),
panel.grid.major = element_line(linewidth = 0.75),legend.spacing.y = unit(10, 'pt'))+
guides(color=guide_legend(title="Track Type",byrow=T))+
#scale_color_manual(labels = c("Closed", "Non-Closed")) +
scale_x_continuous(breaks=-4:3)+
#scale_y_continuous(limits=c(-0.75,0.75))+
xlab("Years Relative to Closure") +
ylab("Standard Deviations")
dev.off()
format(round(1200/1000,0), nsmall = 0)
format(round(222200/1000,0), nsmall = 0)
#Preamble
#memory.limit(size=65000)
#Load packages and ser working directoy
pacman::p_load(tidyverse,data.table,ggplot2,readxl,xtable,
scales#negative log scale in ggplot
)
# ,R.utils,haven,fixest,
#                bookdown,knitr,modelsummary,forcats,scales,geofacet,
#                fuzzyjoin,
#                kableExtra, #scales used for percenatege in kableExtra
#                datapasta,
#                xtable,#export table to latex easily
#                zoo, #linear interp
#                ggrepel,#makes ggplot labeling easy
#                viridis,#virids palette
#                #cowplot,
#                gridExtra, #several graphs on same plot
#                ggh4x,
#                stringi, #remove non-unicode
#                ggdark,#variable scales for figure 1
#                PTXQC,#longest substring
#                tm)  #remove words
# Sys.setlocale("LC_ALL", 'en_US.UTF-8')
wd<-('C:/users/andre/Dropbox/Research/2022 Ontario/')
#wd<-('C:/Users/MUNTEANU_A/Dropbox/Research/2022 Ontario/')
#wd<-('F:/Dropbox/Dropbox/Andrei projects/Census/')
#wd<-('C:/Andrei/Dropbox/Andrei projects/Census/')
wd_data<-paste0(wd,'/data/')
wd_code<-paste0(wd,'/code/')
wd_figures<-paste0(wd,'/figures/')
#load packages
pacman::p_load(stringdist,
kableExtra,
openxlsx,
fixest,
ggplot2,
scales,
modelsummary,
readxl,
stringr,
tidyr,
dplyr)
#set file paths
wd<-"C:/Users/MUNTEANU_A/Dropbox/Research/2018 JMP/"
wd_code<-paste0(wd,'code/replication/')
wd_data_intermediate<-paste0(wd,'data/intermediate/')
wd_data_final<-paste0(wd,'data/final/')
#load data
setwd(wd_data_final)
data_student_raw<-readRDS("data_student_anon")
data_teacher_raw<-readRDS('data_teacher_anon')
data_exp_raw<-readRDS('data_expenditure_anon')
data_student_teacher_raw<-readRDS('data_student_teacher_anon')
data_student_exp_raw<-readRDS('data_student_expenditure_anon')
data_student_teacher_exp_raw<-readRDS("data_student_teacher_expenditure_anon")
openings<-readRDS('openings_anon')
colnames(data_student_raw)
colnames(data_exp_raw)
setwd(wd_data_final)
data_student_teacher_expenditure_anon<-readRDS("data_student_teacher_expenditure_anon") %>%
select(matches("SIRUTA|SIIIR|judet|^an|liceu|school_h|scoala_de|town|specializare|id_|unitate|n_|dist|rezultat|school_change|entrance_|grad_|class_|school_|dec|quart|med|Wages_hs|drop|Unemployment_hs|teacher|County|Year|Exp|ValoareEUR|Type|subject|mandatory|elective|disciplina|lb_romana|gpa|Exeprience|Category",perl=T)) %>%
select(-matches("cls|scl|_ID_|opening",perl=T)) %>%
ungroup()
saveRDS(data_student_teacher_expenditure_anon,"data_student_teacher_expenditure",compress=T)
data_expenditure_anon<-readRDS('data_expenditure_anon') %>%
select(matches("town|judet|ValoareEUR|^an|Type|unitate",perl=T)) %>%
ungroup()
saveRDS(data_expenditure_anon,"data_expenditure_anon",compress=T)
colnames(data_expenditure_anon)
data_teacher_anon<-readRDS('data_teacher_anon') %>%
select(matches("SIRUTA|SIIIR|judet|^an|liceu|school_h|scoala_de|town|specializare|id_|unitate|n_|dist|rezultat|school_change|entrance_|grad_|class_|school_|dec|quart|med|Wages_hs|drop|Unemployment_hs|teacher|County|Year",perl=T)) %>%
select(-matches("_Level|Certification|Hometown|Long|County\\.|County_|ID|Inspection",perl=T)) %>%
ungroup()
saveRDS(data_teacher_anon,"data_teacher_anon",compress=T)
colnames(data_teacher_anon)
head(data_teacher_anon)
head(data_student_teacher_raw)
colnames(data_student_teacher_raw)
data_student_teacher_raw$Education_num
data_student_teacher_raw$Education_num.ro
colnames(data_student_exp_raw)
colnames(data_student_teacher_expenditure_anon)
